from PyCmpltrtok.common import sep


def get_data(xpath):
    xdata = []
    with open(xpath, 'r', encoding='utf8') as f:
        while True:
            xline = f.readline()
            if not xline:
                break
            if '\r\n' == xline[-2:]:
                xline = xline[:-2]
            elif '\r' == xline[-1:]:
                xline = xline[:-1]
            xline = xline.strip()
            if not xline:
                continue
            xid, xtext, xem = xline.split('\t')
            xid = int(xid)
            xdata.append((xid, xtext, xem, ))
    return xdata


if '__main__' == __name__:
    # washed_path = '_save/washed/emotionX7_pd'
    washed_path = '_save/washed/emotionX7'
    washed_path_train = washed_path + '_train.txt'
    washed_path_val = washed_path + '_val.txt'

    sep('train')
    train_data = get_data(washed_path_train)
    sep('val')
    val_data = get_data(washed_path_val)
    sep('All over')

    print(train_data[:5])
    print(val_data[:5])
